library(haven)
library(readr)
library(data.table)
library(scales)
library(dplyr)
library(lfe)
library(stargazer)
cohort <- fread("H:/Zheng_10223/Joint/cohort_2025.csv")

# This code makes the binscatters by world area of birth and landing age:



cohort$MainParent_Income_HH_MainParentAge45_49_quintparent=as.numeric(cut(cohort$MainParent_Income_HH_MainParentAge45_49_pctparent,breaks=c(0,20,40,60,80,100),labels=c(1,2,3,4,5)))



############### WORLD AREA OF BIRTH ###############################

par(mar=c(4.5,4.7,3.5,2))

m<-matrix(c(1,2,3,4,5,6,7,7,7),nrow=3,ncol=3,byrow=TRUE)

layout(mat=m,heights=c(0.45,0.45,0.1))

# Europe 
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Europe", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Europe", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="", ylab="Child Rank", main="Europe", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Europe", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Europe", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)


# Eastern Asia
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Eastern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Eastern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="", ylab="", main="East Asia", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Eastern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Eastern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)


# Southern Asia
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Southern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Southern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="", ylab="", main="South Asia", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Southern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Southern Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)

# Africa and Middle East
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Africa and Middle East", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Africa and Middle East", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="Parent Quintile", ylab="Child Rank", main="Africa & Middle East", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Africa and Middle East", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Africa and Middle East", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)

# Oceania and other Asia
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Oceania and other Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Oceania and other Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="Parent Quintile", ylab="", main="Oceania", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Oceania and other Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="Oceania and other Asia", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)

# South and Central America
plot(y=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="South and Central America", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="South and Central America", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.2, xlab="Parent Quintile", ylab="", main="South & Central America", ylim=c(40,80), cex.lab=1.5, cex.axis=1.5, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="South and Central America", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & WORLD_AREA_BIRTH=="South and Central America", .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.5)


par(mar=c(0,0,0,0))
plot(0, xlim=c(0,1), ylim=c(0,1),type="n",axes=FALSE,xlab="",ylab="")
legend(x=0.32, y=0.88,c("Non-Refugees","Refugees"), horiz=TRUE,pch=c(19,18),pt.cex=c(1.7,1.7),col=c(alpha("royalblue3",0.5),alpha("forestgreen",0.5)), bty="n", cex=2)

cohort$RefugeeLab="Non-Refugee"
cohort$RefugeeLab[cohort$ImmigrationCategory=="Refugee"]="Refugee"
# counts:
area_counts=cohort %>% group_by(MainParent_Income_HH_MainParentAge45_49_quintparent, WORLD_AREA_BIRTH,RefugeeLab) %>% summarize(count=n())

write.csv(area_counts, "H:/Zheng_10223/ToVet/Supporting/worldarea_counts.csv")


##################### Landing Age ##################################
par(mar=c(4.5,4.7,2,2))


m<-matrix(c(1,2,3,3),nrow=2,ncol=2,byrow=TRUE)

layout(mat=m,heights=c(0.45,0.45,0.01))


# Refugees
plot(y=cohort[!ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(0,1,2,3,4), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(0,1,2,3,4), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.8, ylab="Child Rank",xlab="Parent Quintile", main="Refugees", ylim=c(40,80), cex.lab=1.85, cex.axis=1.85, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(5,6,7,8,9), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(5,6,7,8,9), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.8)
points(y=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(10,11,12,13,14), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(10,11,12,13,14), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.05, pch=17, col=alpha("purple",0.5), cex=2.8)
points(y=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(15,16,17), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory=="Refugee" & LANDING_AGE %in% c(15,16,17), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.05, pch=16, col=alpha("red",0.5), cex=2.8)

# Non-Refugees
plot(y=cohort[!ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(0,1,2,3,4), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome,
     x=cohort[!ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(0,1,2,3,4), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.1, pch=19, col=alpha("royalblue3",0.5), cex=2.8, ylab="Child Rank",xlab="Parent Quintile", main="Non-Refugees", ylim=c(40,80), cex.lab=1.85, cex.axis=1.85, cex.main=2, xlim=c(0.8,5.2))
points(y=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(5,6,7,8,9), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(5,6,7,8,9), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.1, pch=18, col=alpha("forestgreen",0.5), cex=2.8)
points(y=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(10,11,12,13,14), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(10,11,12,13,14), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent+0.05, pch=17, col=alpha("purple",0.5), cex=2.8)
points(y=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(15,16,17), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$ChildIncome, 
       x=cohort[ImmigrationCategory!="Refugee" & LANDING_AGE %in% c(15,16,17), .(ChildIncome=mean(Child_Income_IND_30_34_pct, na.rm=T)), by="MainParent_Income_HH_MainParentAge45_49_quintparent"]$MainParent_Income_HH_MainParentAge45_49_quintparent-0.05, pch=16, col=alpha("red",0.5), cex=2.8)

plot(0, xlim=c(0,1), ylim=c(0,1),type="n",axes=FALSE,xlab="",ylab="")
legend(x=0.22, y=0.25,c("Ages 0-4","Ages 5-9","Ages 10-14","Ages 15+"), horiz=TRUE,pch=c(19,18,17,16),pt.cex=c(1.7,1.7,1.7,1.7),col=c(alpha("royalblue3",0.5),alpha("forestgreen",0.5),alpha("purple",0.5),alpha("red",0.5)), bty="n", cex=1.5, x.intersp = 0.5)


cohort$landingagecut=cut(cohort$LANDING_AGE, breaks=c(0,4,9,14,18),labels=c(0,5,10,15))

landingage_counts=cohort %>% group_by(MainParent_Income_HH_MainParentAge45_49_quintparent, landingagecut,refugeein) %>% summarize(meanchildrank=mean(Child_Income_IND_30_34_pct, na.rm=T),count=n())

write.csv(landingage_counts, "H:/Zheng_10223/ToVet/Supporting/landingage_counts.csv")






